urlprefix <- 'https://www.pro-football-reference.com/years/'
urlend <- '/draft.htm'
startyr <- 2000
endyr <- 2020
draftedPlayers <- data.frame()
for (i in startyr:endyr) {
URL <- paste(urlprefix, as.character(i), urlend, sep = "")
table <- URL %>%
read_html() %>%
html_node('table') %>%
html_table()
table$Year <- i
draftedPlayers <- rbind(table, draftedPlayers)
}
cols = names(draftedPlayers)
cols[1] = 'Rnd'
cols[2] = 'Pick'
cols[3] = 'Team'
cols[4] = 'Player'
cols[5] = 'Pos'
cols[6] = 'Age'
cols[7] = 'Last Year'
cols[8] = '1st Team All Pro'
cols[9] = 'Pro Bowl'
cols[10] = 'Years as Starter'
cols[11] = 'Career AV'
cols[12] = 'Draft Team AV'
cols[13] = 'Games Played'
cols[14] = 'Passes Completed'
cols[15] = 'Pass Attempts'
cols[16] = 'Passing Yds'
cols[17] = 'Passing TDs'
cols[18] = 'Interceptions Thrown'
cols[19] = 'Rushing Attepts'
cols[20] = 'Rushing Yds'
cols[21] = 'Rushing TDs'
cols[22] = 'Receptions'
cols[23] = 'Recieving Yds'
cols[24] = 'Recieving TDs'
cols[25] = 'Solo Tackles'
cols[26] = 'Interceptions (Def)'
cols[27] = 'Sacks (Def)'
cols[28] = 'College/Univ'
cols[29] = 'other'
cols[30] = 'Draft Year'
names(draftedPlayers) = cols
draftedPlayers <- draftedPlayers[draftedPlayers$Player != "Player",] #remove title rows
draftedPlayers <- select(draftedPlayers, -c('other')) #remove other
draftedPlayers$Team[draftedPlayers$Team == 'GNB'] <- 'GB'
draftedPlayers$Team[draftedPlayers$Team == 'KAN'] <- 'KC'
draftedPlayers$Team[draftedPlayers$Team %in% c( 'LVR', 'OAK')] <- 'LV'
draftedPlayers$Team[draftedPlayers$Team == 'NWE'] <- 'NE'
draftedPlayers$Team[draftedPlayers$Team == 'NOR'] <- 'NO'
draftedPlayers$Team[draftedPlayers$Team == 'SFO'] <- 'SF'
draftedPlayers$Team[draftedPlayers$Team == 'TAM'] <- 'TB'
draftedPlayers$Team[draftedPlayers$Team == 'SDG'] <- 'LAC'
draftedPlayers$Team[draftedPlayers$Team == 'STL'] <- 'LAR'
draftedPlayers <- draftedPlayers %>%
mutate_at(vars(-c(Team, Player, Pos, `College/Univ`)), funs(as.numeric(.)))
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
draftedPlayers$`College/Univ` <- gsub("St.", "State", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Col.", "College", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub(" (FL)", "", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("North Carolina State", "NC State", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Mississippi", "Ole Miss", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("North Carolina State", "NC State", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Ole Miss State", "Mississippi State", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Hawaii", "Hawai'i", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Central Florida", "UCF", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Southern Miss", "Southern Mississippi", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Cal Poly-San Luis Obispo", "Cal Poly", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("California-Davis", "UC Davis", draftedPlayers$`College/Univ`, fixed=TRUE)
draftedPlayers$`College/Univ` <- gsub("Massachusetts", "UMass", draftedPlayers$`College/Univ`, fixed=TRUE)
teamInfo <- cfbd_team_info(conference = NULL, only_fbs = TRUE, year = NULL)
logos <- data.frame(matrix(unlist(teamInfo$logos), nrow=length(teamInfo$logos), byrow=TRUE),stringsAsFactors=FALSE)
teamInfo <- cbind(teamInfo, logos) %>%
rename(
logo1 = X1,
logo2 = X2
)
teamInfo <- teamInfo[ -c(12) ]
Seasons hack ensures that there is a 0 data point for years when no players are drafted.
BYUDraftTimeline <- draftedPlayers %>%
filter(`College/Univ` %in% c("BYU")) %>%
group_by(`Draft Year`) %>%
summarise(
numPicks = n()
)
seasonsHack <- draftedPlayers %>%
group_by(`Draft Year`) %>%
summarise(
blank = 0
)
BYUDraftTimeline <- merge(x = seasonsHack, y = BYUDraftTimeline, by = "Draft Year", all = TRUE)
BYUDraftTimeline[is.na(BYUDraftTimeline)] <- 0
BYUDraftTimeline <- BYUDraftTimeline %>%
filter(`Draft Year` >= 2005)
rm(seasonsHack)
BYUDraftTimeline[nrow(BYUDraftTimeline) + 1,] <- c(2021, 0, 5)
graph <- BYUDraftTimeline %>%
ggplot(aes(x = `Draft Year`, y = numPicks, color = "#001E4C", linetype = factor(ifelse(`Draft Year`==2021,"Solid", "dotted")))) +
geom_line(size = 1) +
geom_segment(x = 2020, y = 0, xend = 2021, yend = 5, color="#001E4C", linetype="dotted", size = 1) +
geom_point(alpha=.7) +
scale_colour_identity() +
xlim(2005, 2021) +
ylim(0, 5) +
theme_fivethirtyeight() +
theme(
legend.position = "none",
legend.title = element_blank(),
strip.text = element_text(face = "bold"),
axis.title.y = element_text(),
axis.title.x = element_text(),
axis.text.x = element_text(angle = 45, vjust = .7, face = "bold"),
panel.grid.major.x = element_blank()
) +
labs(
y = "Draft Picks",
x = "Season",
title = "BYU draft picks by season",
subtitle = "The Cougars haven't had more than one player selected in the draft since 2009",
caption = "Data: @pfrer | Plot: @LauraStickells"
) +
scale_x_continuous(breaks = seq(min(BYUDraftTimeline$`Draft Year`), max(BYUDraftTimeline$`Draft Year`), by = 1))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
graph
all_schools <- draftedPlayers %>%
filter(!is.na(`College/Univ`), !is.na(`Career AV`), !is.na(Pick), `Draft Year` > 2000) %>%
group_by(`College/Univ`) %>%
summarise(
n = n()
) %>%
filter(n >= 13) %>%
select(`College/Univ`)
all_schools <- all_schools[['College/Univ']]
cor_p_table <- data.frame(matrix(ncol=3, nrow=0, dimnames=list(NULL, c("School", "Cor", "pvalue"))))
for (i in all_schools) {
temp_table <- data.frame()
temp_table <- draftedPlayers %>%
filter(`College/Univ` == i, `Draft Year` > 2010)
res <- cor.test(temp_table$Pick, temp_table$`Career AV`)
new_row <- c(i, round(res$estimate, 2), round(res$p.value, 3))
cor_p_table[nrow(cor_p_table) + 1, ] <- new_row
}
cor_p_table <- transform(cor_p_table, Cor = as.numeric(Cor))
cor_p_table <- transform(cor_p_table, pvalue = as.numeric(pvalue))
schoolAverageValue <- draftedPlayers %>%
filter(!is.na(`College/Univ`), !is.na(`Career AV`), !is.na(Pick), `Draft Year` >= 2010) %>%
group_by(`College/Univ`) %>%
summarise(
n = n(),
avg = mean(`Career AV`)
) %>%
filter(n >= 5)
draftedPlayers %>%
filter(`College/Univ` == "BYU", `Draft Year` >= 2010) %>%
select(Player)
## # A tibble: 7 x 1
## Player
## <chr>
## 1 Sione Takitaki
## 2 Fred Warner
## 3 Jamaal Williams
## 4 Bronson Kaufusi
## 5 Kyle Van Noy
## 6 Ezekiel Ansah
## 7 Dennis Pitta
BYUplayerInfo <- draftedPlayers %>%
filter(`College/Univ` == "BYU", `Draft Year` >= 2010)
player <- 'Fred Warner'
position <- BYUplayerInfo$Pos[BYUplayerInfo$Player== player]
year <- BYUplayerInfo$`Draft Year`[BYUplayerInfo$Player== player]
pick <- BYUplayerInfo$Pick[BYUplayerInfo$Player== player]
pickedBeforeData <- draftedPlayers %>%
filter(`Pos` == position, `Draft Year` == year, Pick <= pick)
pickedBeforeData[is.na(pickedBeforeData)] <- 0
meanPickedBeforeData <- mean(pickedBeforeData$`Career AV`)
pickedBeforeGraph <- pickedBeforeData %>%
ggplot(aes(x = reorder(Player, Pick) , y = `Career AV`, fill = factor(ifelse(`College/Univ`=="BYU","Blue","Normal")))) +
geom_col(alpha = 0.7) +
scale_fill_manual(name = "College/Univ", values=c("#001E4C","grey50")) +
geom_hline(yintercept = meanPickedBeforeData, color = "black", linetype = "dashed") +
geom_label(aes(label = paste('Rnd', Rnd, '|', 'Pk', Pick, '\n', 'Career AV:', `Career AV`)), fill = "white", vjust = -0.1, size = 3, fontface = "bold") +
ylim(0, max(pickedBeforeData$`Career AV`, na.rm = TRUE)+10) +
ggthemes::theme_fivethirtyeight() +
theme(
legend.position = "none",
strip.text = element_text(face = "bold"),
axis.title.y = element_text(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, vjust = .7, face = "bold"),
panel.grid.major.x = element_blank()
) +
labs(
y = "Career AV",
title = paste(player, " Career AV compared to ", position, "s picked before him in ", year, sep = ""),
subtitle = "",
caption = "Data: @ProFootballReference | Plot: @LauraStickells"
)
pickedBeforeGraph